spam

In [1]:
import warnings
warnings.filterwarnings('ignore')

Datos grupo 1 > Modelo 1: Modelo Current (Rafa) Año¶

1. Importing libraries and loading data¶

In [2]:
# Basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Normalization  libraries
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, r2_score

# Tensorflow with keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Easy graphs with plotly
import plotly.express as px
import plotly.graph_objects as go

# Matplotlib plots look like
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15,7)
import pickle as pk

#import scipy
from scipy import stats
2023-03-23 23:10:28.145557: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-23 23:10:45.909617: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64
2023-03-23 23:10:45.932127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64
2023-03-23 23:10:45.932141: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
In [3]:
# Importing tensors
X_train = np.load('./outputs/split/X_train.npy')
X_val = np.load('./outputs/split/X_val.npy')
X_test = np.load('./outputs/split/X_test.npy')
y_train = np.load('./outputs/split/y_train.npy')
y_val = np.load('./outputs/split/y_val.npy')
y_test = np.load('./outputs/split/y_test.npy')

2. Setting the model¶

1.1 Setting the parameters according to the tensors created in JNB1
In [4]:
n_pasado = (24 * 7)+12
n_futuro = 24
n_salto = 12 

nom_attr = ['Energy_Demand', 
            
            'Month', 
            'Day', 
            'Hour',
            'PC1_Weather',
            'PC2_Weather',
            'Monday_Holiday',
            'Tuesday_Aft_Hol', 
            'Easter_week', 
            'May_1s', 
            'May_10t', 
            'Sept_16',
            'Nov_2nd', 
            'Before_Christmas_NY', 
            'Christmas_NY',
            'After_Christmas_NY' ]
n_attr = len(nom_attr)
1.2 Configuring the LSTM Encoder-Decoder model

MODEL

drawing

LSTM UNIT

drawing

Such that:

  • Forget Gate $F$: a NN with sigmoid
  • Candidate layer $C$: a NN with Tanh
  • Input Gate $I$: a NN with sigmoid
  • Output Gate $O$: a NN with sigmoid
  • Hidden state $h$: a vector, in our case we set $h\in\mathbb{R}^{100}$
  • Memory state $c$: a vector, in our case we set $c\in\mathbb{R}^{100}$
In [5]:
# Configuration of the encoder
encoder_inputs = layers.Input(shape=(n_pasado, n_attr))

encoder_l1 = layers.LSTM(25, return_state=True)# 100: dimension of hidden states
encoder_outputs1 = encoder_l1(encoder_inputs)

encoder_states1 = encoder_outputs1[1:]
2023-03-23 23:11:56.225538: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-23 23:12:00.124718: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30972 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:1a:00.0, compute capability: 7.0
In [6]:
# Configuration of the encoder
decoder_rvec = layers.RepeatVector(n_futuro) # repeat vector 24 times
decoder_inputs = decoder_rvec(encoder_outputs1[0])

decoder_l1 = layers.LSTM(25, return_sequences=True)
decoder_l1_output = decoder_l1(decoder_inputs, initial_state=encoder_states1)

decoder_l2 = layers.TimeDistributed(layers.Dense(1)) # just one dense layer
decoder_outputs = decoder_l2(decoder_l1_output)
In [7]:
modelo = keras.models.Model(encoder_inputs, decoder_outputs)
modelo.summary()
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 input_1 (InputLayer)           [(None, 180, 16)]    0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, 25),         4200        ['input_1[0][0]']                
                                 (None, 25),                                                      
                                 (None, 25)]                                                      
                                                                                                  
 repeat_vector (RepeatVector)   (None, 24, 25)       0           ['lstm[0][0]']                   
                                                                                                  
 lstm_1 (LSTM)                  (None, 24, 25)       5100        ['repeat_vector[0][0]',          
                                                                  'lstm[0][1]',                   
                                                                  'lstm[0][2]']                   
                                                                                                  
 time_distributed (TimeDistribu  (None, 24, 1)       26          ['lstm_1[0][0]']                 
 ted)                                                                                             
                                                                                                  
==================================================================================================
Total params: 9,326
Trainable params: 9,326
Non-trainable params: 0
__________________________________________________________________________________________________

3. Training the model¶

Let $F_\theta: {\rm Mat}_{180\times 18}(\mathbb{R})\rightarrow \mathbb{R}^{24}$ be the nonlinear mapping given by the LSTM model where $\theta \in\mathbb{R}^{128,101}$ is the vector of parameters to fit

The training, validation and test sets are codificated by the tensors $X_{180\times 18\times m}$ and $Y_{24\times 1\times m}$ for $m = m_{\rm train}, m_{\rm val}, m_{\rm test}$, as above. This tensors are, in each case, just $m$ samples of the ordered pairs in ${\rm Mat}_{180\times 18}(\mathbb{R})\times \mathbb{R}^{24}$: $$\{(X^1,Y^1),(X^2,Y^2) \dots(X^m,Y^m)\}.$$

Loss function

For fixed $\theta$ and tensor pair $X$ and $Y$, we set the loss function in terms of the MAE metric with the $\ell_1$ norm:

$${\rm Loss}(X) = \frac{1}{m}\sum_{k=1}^m{\|F_\theta(X^k)-Y^k\|_1}$$ Hyperparameters
  • epochs (upper bound) = 25: An epoch is when an entire training set is passed through the NN only once.

  • batch size = 32: Total number of training examples present in a single batch.

  • patience = 5: Number of epochs with no improvement after which training will be stopped, in our case with respect to ${\rm Loss}(X_{\rm val})$.

  • learning rate $\alpha_{\rm epoch} = 1\times 10^{-3} \times 0.9 ^{\rm epoch}$

Training process

drawing

In [8]:
## Sets the function for the rate learning in such a way that earning rate is smaller en each epoch
reduce_lr = keras.callbacks.LearningRateScheduler(lambda x: 1e-3 * 0.90 ** x) 

## During the run, the models of "best" result are stored according to "val_loss"

path_checkpoint = "./outputs/h5/model_checkpoint.h5"
modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

## Setting the number of epochs with no improvement after which training will be stopped, 
## with respect to "val_loss". This is to avoid overfitting.
es_callback = keras.callbacks.EarlyStopping(
    monitor="val_loss", 
    min_delta=0, 
    patience=5 ### probar con menos
    
)

modelo.compile(
    optimizer=keras.optimizers.Adam(),  ### un tipo de descenso de gradiente más eficiente 'momento-inercia'
    loss="mae", ### determinación de la función de costo
    metrics=['accuracy']
)

history = modelo.fit(
    X_train,
    y_train,
    epochs=25,
    validation_data=(X_val,y_val),
    batch_size=32,
    callbacks=[reduce_lr, es_callback]
) ### guardar este history para graficar
Epoch 1/25
2023-03-23 23:12:30.634318: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-03-23 23:12:31.645250: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f0a48015010 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-03-23 23:12:31.645286: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
2023-03-23 23:12:32.045769: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-03-23 23:12:34.874571: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
4025/4025 [==============================] - 37s 7ms/step - loss: 0.0498 - accuracy: 1.3977e-04 - val_loss: 0.0361 - val_accuracy: 0.0000e+00 - lr: 0.0010
Epoch 2/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0333 - accuracy: 1.3977e-04 - val_loss: 0.0291 - val_accuracy: 0.0000e+00 - lr: 9.0000e-04
Epoch 3/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0305 - accuracy: 1.3977e-04 - val_loss: 0.0287 - val_accuracy: 0.0000e+00 - lr: 8.1000e-04
Epoch 4/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0289 - accuracy: 1.3977e-04 - val_loss: 0.0284 - val_accuracy: 0.0000e+00 - lr: 7.2900e-04
Epoch 5/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0278 - accuracy: 1.3977e-04 - val_loss: 0.0252 - val_accuracy: 0.0000e+00 - lr: 6.5610e-04
Epoch 6/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0262 - accuracy: 1.3977e-04 - val_loss: 0.0259 - val_accuracy: 0.0000e+00 - lr: 4.7830e-04
Epoch 9/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0258 - accuracy: 1.3977e-04 - val_loss: 0.0254 - val_accuracy: 0.0000e+00 - lr: 4.3047e-04
Epoch 10/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0256 - accuracy: 1.3977e-04 - val_loss: 0.0248 - val_accuracy: 0.0000e+00 - lr: 3.8742e-04
Epoch 11/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0253 - accuracy: 1.3977e-04 - val_loss: 0.0241 - val_accuracy: 0.0000e+00 - lr: 3.4868e-04
Epoch 12/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0251 - accuracy: 1.3977e-04 - val_loss: 0.0250 - val_accuracy: 0.0000e+00 - lr: 3.1381e-04
Epoch 13/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0249 - accuracy: 1.3977e-04 - val_loss: 0.0245 - val_accuracy: 0.0000e+00 - lr: 2.8243e-04
Epoch 14/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0248 - accuracy: 1.3977e-04 - val_loss: 0.0246 - val_accuracy: 0.0000e+00 - lr: 2.5419e-04
Epoch 15/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0246 - accuracy: 1.3977e-04 - val_loss: 0.0247 - val_accuracy: 0.0000e+00 - lr: 2.2877e-04
Epoch 16/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0245 - accuracy: 1.3977e-04 - val_loss: 0.0234 - val_accuracy: 0.0000e+00 - lr: 2.0589e-04
Epoch 17/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0244 - accuracy: 1.3977e-04 - val_loss: 0.0236 - val_accuracy: 0.0000e+00 - lr: 1.8530e-04
Epoch 18/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0243 - accuracy: 1.3977e-04 - val_loss: 0.0249 - val_accuracy: 0.0000e+00 - lr: 1.6677e-04
Epoch 19/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0242 - accuracy: 1.3977e-04 - val_loss: 0.0243 - val_accuracy: 0.0000e+00 - lr: 1.5009e-04
Epoch 20/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0242 - accuracy: 1.3977e-04 - val_loss: 0.0239 - val_accuracy: 0.0000e+00 - lr: 1.3509e-04
Epoch 21/25
4025/4025 [==============================] - 29s 7ms/step - loss: 0.0241 - accuracy: 1.3977e-04 - val_loss: 0.0244 - val_accuracy: 0.0000e+00 - lr: 1.2158e-04

4. Model validation¶

In [9]:
print(history.history.keys())
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy', 'lr'])
In [10]:
# Model Loss Chart
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
In [11]:
## 5.   Model test results
In [12]:
df_test = pd.read_csv('./outputs/PCA/pca_exogenas_test.csv')
del df_test["Unnamed: 0"]
In [15]:
y_est = modelo.predict(X_test)
y_test[:,:,0].ravel().shape, y_est[:,:,0].ravel().shape, df_test['Date_timed'][n_pasado + n_salto:].shape
12/12 [==============================] - 0s 4ms/step
Out[15]:
((8568,), (8568,), (8568,))
In [16]:
#create forlder/ verify if the folder exists
scalers = pk.load(open("./outputs/DataProcessing/scalers.pkl",'rb'))
In [17]:
scaler = scalers['Energy_Demand']
yr = scaler.inverse_transform(y_test[:,:,0].ravel().reshape(-1, 1))
yh = scaler.inverse_transform(y_est[:,:,0].ravel().reshape(-1, 1))
In [18]:
df_test.tail(3)
Out[18]:
PC1_Weather PC2_Weather Date_timed Energy_Demand Day Hour Month Tmax-Cab Tmax-Hmo Tmax-Obr ... Monday_Holiday Tuesday_Aft_Hol Easter_week May_1s May_10t Sept_16 Nov_2nd Before_Christmas_NY Christmas_NY After_Christmas_NY
8757 -2.350991 -0.585325 2022-09-18 21:00:00 4547.73 6 21 9 36.0 42.5 41.0 ... 0 0 0 0 0 0 0 0 0 0
8758 -2.350991 -0.585325 2022-09-18 22:00:00 4475.52 6 22 9 36.0 42.5 41.0 ... 0 0 0 0 0 0 0 0 0 0
8759 -2.350991 -0.585325 2022-09-18 23:00:00 4328.62 6 23 9 36.0 42.5 41.0 ... 0 0 0 0 0 0 0 0 0 0

3 rows × 31 columns

In [19]:
df_est = pd.DataFrame({
    "Actual": yr.ravel(),
    "Estimated": yh.ravel(),
    "Date_timed": df_test["Date_timed"][n_pasado + n_salto:]     
})
In [20]:
df_est['Date_timed']= pd.to_datetime(df_est['Date_timed'])
df_est["Hour"] = df_est['Date_timed'].dt.hour
In [21]:
real=[]
estimada =[]

df_est_result = pd.DataFrame(columns=['Date_timed',"MAPE"])

for i, row in df_est.iterrows():

    n = row['Hour']
    
    real.append(row['Actual'])
    estimada.append(row['Estimated'])
    
   
    if n == 23:
        df_est_result = df_est_result.append(pd.DataFrame({"Date_timed": row['Date_timed'],
                                           "MAPE": (mean_absolute_percentage_error(real, estimada))*100, 
                                           "R2": stats.pearsonr(real, estimada)[0]}, 
                                            index=[0]), ignore_index=True)
        n=0
        
        real=[]
        estimada =[]
    n+=1
In [22]:
df_est_result
Out[22]:
Date_timed MAPE R2
0 2021-09-27 23:00:00 1.495858 0.993766
1 2021-09-28 23:00:00 3.342752 0.997178
2 2021-09-29 23:00:00 2.657529 0.994140
3 2021-09-30 23:00:00 1.513762 0.994961
4 2021-10-01 23:00:00 0.803945 0.997340
... ... ... ...
352 2022-09-14 23:00:00 2.233575 0.993633
353 2022-09-15 23:00:00 1.578448 0.983501
354 2022-09-16 23:00:00 3.753194 0.974003
355 2022-09-17 23:00:00 2.096500 0.980849
356 2022-09-18 23:00:00 4.107416 0.976197

357 rows × 3 columns

In [23]:
df_est_result.to_csv('./outputs/Training_comparativos/corrida_pcagral_25_5.csv')
df_est.to_csv('./outputs/Training_comparativos/tabla_original_pcagral_25_5.csv')
In [24]:
df_est_result['Date'] = df_est_result['Date_timed'].astype(str)
df_est_result['Date'] = df_est_result['Date'].str[:10]
df_est_result['Date'] = pd.to_datetime(df_est_result['Date'])

Seasons splits¶

In [25]:
df_est_result_summer = df_est_result[(df_est_result['Date'] >= '2022-06-21') 
                   & (df_est_result['Date'] < '2022-09-23')]

df_est_result_autumn = df_est_result[(df_est_result['Date'] >= '2021-09-23') 
                   & (df_est_result['Date'] < '2022-12-21')]

df_est_result_winter = df_est_result[(df_est_result['Date'] >= '2021-12-21') 
                   & (df_est_result['Date'] < '2022-03-21')]

df_est_result_spring = df_est_result[(df_est_result['Date'] >= '2022-03-21') 
                   & (df_est_result['Date'] < '2022-06-21')]
In [27]:
fig = go.Figure()

fig.add_trace(go.Violin(y=df_est_result_spring['MAPE'],
                        name='SPRING MAPE',
                        box_visible=True,
                        meanline_visible=True,
                        fillcolor="rgba(158, 202, 57,0.5)", 
                        line=dict(color="#9eca39")))

fig.add_trace(go.Violin(y=df_est_result_summer['MAPE'],
                        name='SUMMER MAPE',
                        box_visible=True,
                        meanline_visible=True,
                        fillcolor="rgba(249, 190, 4,0.5)", 
                        line=dict(color="#F9BE04")))

fig.add_trace(go.Violin(y=df_est_result_autumn['MAPE'],
                        name='AUTUMN MAPE',
                        box_visible=True,
                        meanline_visible=True,
                        fillcolor="rgba(198, 111, 66,0.5)", 
                        line=dict(color="#C66F42")))

fig.add_trace(go.Violin(y=df_est_result_winter['MAPE'],
                        name='WINTER MAPE',
                        box_visible=True,
                        meanline_visible=True,
                        fillcolor="rgba(112, 163, 187,0.5)", 
                        line=dict(color="#70A3BB")))



fig.update_layout(title_text="MAPE 4 Seasons", height=600) 
fig.show(renderer = "notebook")
#iplot(fig, image='svg', filename='violinplotmapelstmendoderdecoder', image_width=1280, image_height=640)

6. Model export¶

H5 is a file format to store structured data, it's not a model by itself. Keras saves models in this format as it can easily store the weights and model configuration in a single file.

In [28]:
modelo.save('./outputs/h5/modelo.h5')
In [ ]:
from IPython import display
display.Image("https://mcd.unison.mx/wp-content/themes/awaken/img/logo_mcd.png", embed = True)

Maestría en Ciencia de Datos | Universidad de Sonora Blvd. Luis Encinas y Rosales s/n Col. Centro. Edificio 3K1 planta baja C.P. 83000, Hermosillo, Sonora, México mcd@unison.mx Tel: +52 (662) 259 2155